# This code replicates Figure 2
rm(list=ls())
library(foreign)
setwd("")
data <- read.csv("census.csv", sep = ";") 

## Cleaning of variables

# Variables/worker_15plus
data[, 30:31] <- data[, 30:31]/data[, 29]

# Variables/analphabets_total
data[, 21:22] <- data[, 21:22]/data[, 20]

# Variables/households
data[, 51:52] <- data[, 51:52]/data[, 50]
data[, 63:101] <- data[, 63:101]/data[, 50]

# Calculating average household size
data[, 62] <- data[, 2] / data[, 61]

# Variables/population
data[, 3:20] <- data[, 3:20]/data[, 2]
data[, 23:30] <- data[, 23:30]/data[, 2]
data[, 32:49] <- data[, 32:49]/data[, 2]
data[, 54:59] <- data[, 54:59]/data[, 2]

write.csv(data, "census_new.csv")

## Additional restructuring

# Dropping headhousehold = household and extracting non numerical variables
data <- data[ -c(50) ]
data_non_num <- data[ c(1, 102)]
data <- data[ -c(1, 102)]

# Calculating colsums to drop columns with no variation in observations
col <- c(colSums(data[1:99]))!=0
data <- data[col]
data <- data[- c(90)]
var <- colnames(data)

# Bringing nonnumericals back in
data <- cbind(data_non_num, data)

# Creating treatment variables
is.pentecostal <-  data$Group=="Peruana" | data$Group=="Maranatha"
data$treat_pentecostal=0
data$treat_pentecostal[is.pentecostal]=1

is.evangelical <-  data$Group=="Adventist" | data$Group=="Mixed"
data$treat_evangelical=0
data$treat_evangelical[is.evangelical]=1

is.peruana <-  data$Group=="Peruana" 
data$treatment_peruana=0
data$treatment_peruana[is.peruana]=1

is.maranatha <- data$Group=="Maranatha"
data$treatment_maranatha=0
data$treatment_maranatha[is.maranatha ]=1

is.adventist <- data$Group=="Adventist"
data$treatment_adventist=0
data$treatment_adventist[is.adventist]=1

is.mixed <- data$Group=="Mixed"
data$treatment_mixed=0
data$treatment_mixed[is.mixed]=1

is.catholic <- data$Group=="Catholic"
data$treatment_catholic=0
data$treatment_catholic[is.catholic]=1

## Assessing balance

# Function for t.test
test_two_dummy <- function (x, y, z) {
  t.test(x= data[, x][data[, y] == 1], y= data[, x][data[, z] == 1], var.equal = F)["statistic"]
}

# Specifying comparison
identifier <- c("treat_pentecostal", "treatment_peruana", "treatment_mixed", "treatment_maranatha", "treat_evangelical")
identifier2 <- c("treatment_catholic")

# Conducting t-tests
y <- data.frame()
names <-NULL
for (i in identifier) { 
  for (j in identifier2){
    tmp <- mapply(test_two_dummy, var, i, j)
    y <- rbind(y, tmp)
    names <- append(names, paste(i, j, sep="_"))
  }}
y <- t(y)
colnames(y) <- names[1:ncol(y)]
y <- as.data.frame(y)

# Creating appropriate variable names
y$variables <- rownames(y)
y$variables <-  substr(y$variables,1,nchar(y$variables)-10)
y$variables <- sapply(y$variables, function(x) {
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
})
y$variables <- gsub("_", " ", y$variables)
y$variables <- gsub("X", "Age ", y$variables)

variablenames <- y$variables
print(y$variables)

variablenames <- c("Total population","Male","Female","Age below 1",
                   "Age 1-4","Age 5-14","Age 15-64","Age over 64",
                   "Natives","Migrants","Handycap","Blind","Mental issues",
                   "Polio","Underdeveloped extremities","Overdeveloped extremities",
                   "Other handycaps","Analphabets (total)","Analphabets (male)",
                   "Analphabets (female)","No education","Preschool","Primary education",
                   "Secondary education","Higher education","Workers 6-14",
                   "Workers over 14","Workers over 14 (employed)","Workers over 14 (unemployed)",
                   "Occupation: Agriculture","Occupation: Manufacturing, Mining, Construction",
                   "Occupation: Retail","Occupation: Hawker","Occupation: Other services",
                   "Occupation: Other" ,"Wage earner","Independent worker","Employer",
                   "Family Employment","Working in household","Employed in primary sector",
                   "Employed in secondary sector" ,"Employed in tertiary sector","Living together",
                   "Married","Living alone","Living in other form","Head household (male)",
                   "Head household (female)","Average children per mother","Women with more 4 children",
                   "Single mothers","Single mothers 12 19","Single mothers 20-29",
                   "Single mothers 30-49","Young mothers","Flats incl. unoccupied homes","Households",
                   "Average household size","Independent houses","Other houses","Ownership: own",
                   "Ownership: rental","Ownership: other","Walls: cement",
                   "Walls: stone","Walls: wood","Walls: other","Roof: corrugated steel",
                   "Roof: mats","Roof: straw","Roof: other","Water: public well",
                   "Water: tankwagon","Water: other","Sanitary: privy pit",
                   "Sanitory: other","Sanitory: none","Flats with electricity","Flats no electricity",
                   "Flats with only 1 room","Households only dormitories", "Households with shared toilet",
                   "Households with commercial rooms","No domestic appliances","Only radio",
                   "Radio & TV","Sewing machine","Tricycle")
y$variables <- variablenames


# convert to long format to facilitate face wrap
require(tidyr)
y <- y[-c(10)]
t_long <- gather(y, treatment, t_value, treat_pentecostal_treatment_catholic:treat_evangelical_treatment_catholic, factor_key=TRUE)

t_long$t_value <- abs(t_long$t_value)
t_long$sig <- as.numeric(t_long[, 3]>=1.96)

t_long$treatment <- as.character(t_long$treatment)
t_long$treatment <- gsub("_treatment_catholic", "", t_long$treatment)
t_long$treatment <- gsub("treat_", "", t_long$treatment)
t_long$treatment <- gsub("treatment_", "", t_long$treatment)
t_long$treatment <- sapply(t_long$treatment, function(x) {
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
})
t_long$treatment <- as.factor(t_long$treatment)


require(ggplot2)
ggplot(t_long, aes(y= variables, x= t_value, col=as.factor(sig))) +
  geom_point() +
  facet_wrap(~treatment, nrow = 1) +
  geom_vline(xintercept = 1.96, colour="grey", linetype = "longdash") +
  xlab("Absolute t-values versus control") +
  ylab("Census Variables") +
  scale_colour_manual("Value of t-statistic", values = c("black","red"), labels = c("<1.96", ">=1.96"))

ggsave("census.pdf", width = 10, height = 10)


